Data visualization - Plotly Exercises - ex.7 - Adam Trentowski, 162602

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.datasets import load_wine, load_iris
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

Scatter Plot¶

In [2]:
df = px.data.iris()

fig = px.scatter(df, x="sepal_width", y="sepal_length", color='petal_length')
fig.show()

Changed¶

In [3]:
df = pd.DataFrame(data=load_wine().data, columns=load_wine().feature_names)
df['class'] = load_wine().target

fig = px.scatter(df, x="alcohol", y="color_intensity", color='hue')
fig.show()

Bar Chart¶

In [4]:
data_canada = px.data.gapminder().query("country == 'Canada'")

fig = px.bar(data_canada, x='year', y='pop')
fig.show()

Changed¶

In [5]:
mean_alcohol_by_class = df.groupby('class')['alcohol'].mean().reset_index()

fig = px.bar(mean_alcohol_by_class, x='class', y='alcohol',
             labels={'class': 'Class of Wine', 'alcohol': 'Average Alcohol (%)'},
             title='Average alcohol content in each wine class',
             color='alcohol',
             text='alcohol')

fig.show()

Pie Chart¶

In [6]:
df = px.data.tips()
fig = px.pie(df, values='tip', names='day')
fig.show()

Changed¶

In [7]:
df = pd.DataFrame(data=load_wine().data, columns=load_wine().feature_names)
df['class'] = load_wine().target

class_counts = df['class'].value_counts()

fig = px.pie(values=class_counts,
             names=class_counts.index,
             title='Classes in dataset',
             color_discrete_sequence=px.colors.qualitative.Set1)
fig.show()

Histogram¶

In [8]:
x0 = np.random.randn(500)
# Add 1 to shift the mean of the Gaussian distribution
x1 = np.random.randn(500) + 1

fig = go.Figure()
fig.add_trace(go.Histogram(x=x0))
fig.add_trace(go.Histogram(x=x1))

# Overlay both histograms
fig.update_layout(barmode='overlay')
# Reduce opacity to see both histograms
fig.update_traces(opacity=0.75)
fig.show()

Changed¶

In [9]:
df = pd.DataFrame(data=load_iris().data, columns=load_iris().feature_names)
df['class'] = load_iris().target

fig = go.Figure()

for class_id in df['class'].unique():
    class_data = df[df['class'] == class_id]['sepal width (cm)']
    fig.add_trace(go.Histogram(x=class_data, name=f'Class {class_id}'))

fig.update_layout(barmode='overlay')
fig.update_traces(opacity=0.75)
fig.show()

Box Plot¶

In [10]:
df = px.data.tips()

fig = px.box(df, x="time", y="total_bill")
fig.show()

Changed¶

In [11]:
df = pd.DataFrame(data=load_iris().data, columns=load_iris().feature_names)
df['class'] = load_iris().target

fig = px.box(df, x="class", y="sepal length (cm)")
fig.show()

KNN - classification¶

In [12]:
mesh_size = .02
margin = 0.25

# Load and split data
X, y = make_moons(noise=0.3, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(
    X, y.astype(str), test_size=0.25, random_state=0)

# Create a mesh grid on which we will run our model
x_min, x_max = X[:, 0].min() - margin, X[:, 0].max() + margin
y_min, y_max = X[:, 1].min() - margin, X[:, 1].max() + margin
xrange = np.arange(x_min, x_max, mesh_size)
yrange = np.arange(y_min, y_max, mesh_size)
xx, yy = np.meshgrid(xrange, yrange)

# Create classifier, run predictions on grid
clf = KNeighborsClassifier(15, weights='uniform')
clf.fit(X, y)
Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
Z = Z.reshape(xx.shape)

trace_specs = [
    [X_train, y_train, '0', 'Train', 'square'],
    [X_train, y_train, '1', 'Train', 'circle'],
    [X_test, y_test, '0', 'Test', 'square-dot'],
    [X_test, y_test, '1', 'Test', 'circle-dot']
]

fig = go.Figure(data=[
    go.Scatter(
        x=X[y == label, 0], y=X[y == label, 1],
        name=f'{split} Split, Label {label}',
        mode='markers', marker_symbol=marker
    )
    for X, y, label, split, marker in trace_specs
])
fig.update_traces(
    marker_size=12, marker_line_width=1.5,
    marker_color="lightyellow"
)

fig.add_trace(
    go.Contour(
        x=xrange,
        y=yrange,
        z=Z,
        showscale=False,
        colorscale='RdBu',
        opacity=0.4,
        name='Score',
        hoverinfo='skip'
    )
)
fig.show()

Changed¶

In [13]:
df = pd.DataFrame(data=load_iris().data, columns=load_iris().feature_names)
df['class'] = load_iris().target

X = df.iloc[:, :2].values  # for 2D
y = df['class'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

mesh_size = .02
margin = 0.25

x_min, x_max = X[:, 0].min() - margin, X[:, 0].max() + margin
y_min, y_max = X[:, 1].min() - margin, X[:, 1].max() + margin
xrange = np.arange(x_min, x_max, mesh_size)
yrange = np.arange(y_min, y_max, mesh_size)
xx, yy = np.meshgrid(xrange, yrange)

clf = KNeighborsClassifier(n_neighbors=15, weights='uniform')
clf.fit(X_train, y_train)

Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

fig = go.Figure()

symbols = ['circle', 'square', 'diamond']
symbols_open = ['circle-open', 'square-open', 'diamond-open']
colors = ['blue', 'red', 'green']

for i in range(3):
    fig.add_trace(go.Scatter(x=X_train[y_train == i, 0], y=X_train[y_train == i, 1],
                             mode='markers', marker_symbol=symbols[i],
                             marker_color=colors[i], name=f'Train Class {i}'))

    fig.add_trace(go.Scatter(x=X_test[y_test == i, 0], y=X_test[y_test == i, 1],
                             mode='markers', marker_symbol=symbols_open[i],
                             marker_line_color='black', marker_color=colors[i],
                             name=f'Test Class {i}'))

fig.add_trace(go.Contour(x=xrange, y=yrange, z=Z, showscale=False,
                         line_width=0, colorscale='Viridis', opacity=0.5))

fig.update_layout(title='KNN Classification on Iris Dataset with 2 Features',
                  xaxis=dict(title='Sepal Length (cm)'),
                  yaxis=dict(title='Sepal Width (cm)'),
                  legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01))

fig.show()